library(tidyverse)
library(leaflet)
library(sf)
library(geoR)
library(fields)
library(maps)
library(mapdata)
library(units)
data.dir = gsub('/code', '/data', getwd())
library(geoR)
# Define spatial parameters for the Matérn function
kappa <- 0.5 # Smoothness
phi <- 0.1 # Range
sigma2 <- 1 # Partial sill
tau2 <- 0.1 # Nugget
# Simulate Gaussian random field using Matérn covariance
grf_simulation <- grf(n = 100, grid = "reg", cov.model = "matern",
cov.pars = c(sigma2, phi), nugget = tau2, kappa = kappa)
# Plot the simulated field
plot(grf_simulation)
variogram_empirical <- variog(grf_simulation)
set.seed(123)
n <- 200
coords <- cbind(runif(n, 0, 10), runif(n, 0, 10))
grf_data <- grf(n, grid=coords, cov.model="matern", cov.pars=c(20, 5), kappa=0.5, nugget = 0.5)
## grf: simulation on a set of locations provided by the user
## grf: process with 1 covariance structure(s)
## grf: nugget effect is: tausq= 0.5
## grf: covariance model 1 is: exponential(sigmasq=20, phi=5)
## grf: decomposition algorithm used is: cholesky
## grf: End of simulation procedure. Number of realizations: 1
matern_simulation<-variog(grf_data,option="cloud")
## variog: computing omnidirectional variogram
plot(matern_simulation,xlab="Distance (h)", main= 'semivariance of all pairs of point')
matern_simulation_bin<-variog(grf_data,uvec=seq(0,matern_simulation$max.dist,l=20),option="bin", estimator.type="modulus")
## variog: computing omnidirectional variogram
plot(matern_simulation_bin, xlab="Distance (h)",main = "empirical semi-variogram")
By eyeball, the parameters are:
1. sigma2 <- sill-nugget = 20.5-0.5 = 20
2. phi <- 6
3. tau2 <- 0.5
The eyeballed estimate of the spatial parameters conform with the intuition given the spatial process I simulated. In the simulation process, we set sigma2=20, phi=5, tau2= 0.5, which are similar to the eyeballed estimates.
######### Matern function #########
#ols
matern_ols=variofit(matern_simulation_bin, ini.cov.pars=c(20, 5),nugget=0.5,fix.nugget=FALSE,max.dist=10,cov.model='matern', weights="equal")
## variofit: covariance model used is matern
## variofit: weights used: equal
## variofit: minimisation function used: optim
plot(matern_simulation_bin, xlab="Distance (h)",main = "empirical semi-variogram")
lines(matern_ols,col="blue",lwd=1.5)
summary(matern_ols)
## $pmethod
## [1] "OLS (ordinary least squares)"
##
## $cov.model
## [1] "matern"
##
## $spatial.component
## sigmasq phi
## 23.863447 4.817991
##
## $spatial.component.extra
## kappa
## 0.5
##
## $nugget.component
## tausq
## 0.224963
##
## $fix.nugget
## [1] FALSE
##
## $fix.kappa
## [1] TRUE
##
## $practicalRange
## [1] 14.43342
##
## $sum.of.squares
## value
## 23.17898
##
## $estimated.pars
## tausq sigmasq phi
## 0.224963 23.863447 4.817991
##
## $weights
## [1] "equal"
##
## $call
## variofit(vario = matern_simulation_bin, ini.cov.pars = c(20,
## 5), cov.model = "matern", fix.nugget = FALSE, nugget = 0.5,
## max.dist = 10, weights = "equal")
##
## attr(,"class")
## [1] "summary.variomodel"
######### Matern function #########
# wls
matern_wls=variofit(matern_simulation_bin,ini.cov.pars=c(20,5),nugget=0.5,fix.nugget=FALSE,cov.model='matern',weights='cressie')
## variofit: covariance model used is matern
## variofit: weights used: cressie
## variofit: minimisation function used: optim
plot(matern_simulation_bin, xlab="Distance (h)",main = "empirical semi-variogram")
lines(matern_wls,col="blue",lwd=1.5)
summary(matern_wls)
## $pmethod
## [1] "WLS (weighted least squares)"
##
## $cov.model
## [1] "matern"
##
## $spatial.component
## sigmasq phi
## 28.59475 7.00123
##
## $spatial.component.extra
## kappa
## 0.5
##
## $nugget.component
## tausq
## 0.90476
##
## $fix.nugget
## [1] FALSE
##
## $fix.kappa
## [1] TRUE
##
## $practicalRange
## [1] 20.97383
##
## $sum.of.squares
## value
## 172.4118
##
## $estimated.pars
## tausq sigmasq phi
## 0.90476 28.59475 7.00123
##
## $weights
## [1] "cressie"
##
## $call
## variofit(vario = matern_simulation_bin, ini.cov.pars = c(20,
## 5), cov.model = "matern", fix.nugget = FALSE, nugget = 0.5,
## weights = "cressie")
##
## attr(,"class")
## [1] "summary.variomodel"
######### Matern function #########
# MLE
matern_mle=likfit(grf_data,ini.cov.pars=c(20,5),nugget=0.5, fix.nugget=FALSE, cov.model='matern', lik.method='ML')
## ---------------------------------------------------------------
## likfit: likelihood maximisation using the function optim.
## likfit: Use control() to pass additional
## arguments for the maximisation function.
## For further details see documentation for optim.
## likfit: It is highly advisable to run this function several
## times with different initial values for the parameters.
## likfit: WARNING: This step can be time demanding!
## ---------------------------------------------------------------
## likfit: end of numerical maximisation.
plot(matern_simulation_bin, xlab="Distance (h)",main = "empirical semi-variogram")
lines(matern_mle,col="blue",lwd=1.5)
summary(matern_mle)
## Summary of the parameter estimation
## -----------------------------------
## Estimation method: maximum likelihood
##
## Parameters of the mean component (trend):
## beta
## 2.8329
##
## Parameters of the spatial component:
## correlation function: exponential
## (estimated) variance parameter sigmasq (partial sill) = 16
## (estimated) cor. fct. parameter phi (range parameter) = 4.067
## anisotropy parameters:
## (fixed) anisotropy angle = 0 ( 0 degrees )
## (fixed) anisotropy ratio = 1
##
## Parameter of the error component:
## (estimated) nugget = 0.5115
##
## Transformation parameter:
## (fixed) Box-Cox parameter = 1 (no transformation)
##
## Practical Range with cor=0.05 for asymptotic range: 12.18473
##
## Maximised Likelihood:
## log.L n.params AIC BIC
## "-403" "4" "813.9" "827.1"
##
## non spatial model:
## log.L n.params AIC BIC
## "-543" "2" "1090" "1097"
##
## Call:
## likfit(geodata = grf_data, ini.cov.pars = c(20, 5), fix.nugget = FALSE,
## nugget = 0.5, cov.model = "matern", lik.method = "ML")
######### Matern function #########
# Restricted MLE
matern_reml=likfit(grf_data,ini.cov.pars=c(20,5),nugget=0.5, fix.nugget=FALSE, cov.model='matern', lik.method='REML')
## ---------------------------------------------------------------
## likfit: likelihood maximisation using the function optim.
## likfit: Use control() to pass additional
## arguments for the maximisation function.
## For further details see documentation for optim.
## likfit: It is highly advisable to run this function several
## times with different initial values for the parameters.
## likfit: WARNING: This step can be time demanding!
## ---------------------------------------------------------------
## likfit: end of numerical maximisation.
plot(matern_simulation_bin, xlab="Distance (h)",main = "empirical semi-variogram")
lines(matern_reml,col="blue",lwd=1.5)
summary(matern_reml)
## Summary of the parameter estimation
## -----------------------------------
## Estimation method: restricted maximum likelihood
##
## Parameters of the mean component (trend):
## beta
## 2.2292
##
## Parameters of the spatial component:
## correlation function: exponential
## (estimated) variance parameter sigmasq (partial sill) = 23.99
## (estimated) cor. fct. parameter phi (range parameter) = 6.256
## anisotropy parameters:
## (fixed) anisotropy angle = 0 ( 0 degrees )
## (fixed) anisotropy ratio = 1
##
## Parameter of the error component:
## (estimated) nugget = 0.5274
##
## Transformation parameter:
## (fixed) Box-Cox parameter = 1 (no transformation)
##
## Practical Range with cor=0.05 for asymptotic range: 18.74193
##
## Maximised Likelihood:
## log.L n.params AIC BIC
## "-398.5" "4" "805.1" "818.2"
##
## non spatial model:
## log.L n.params AIC BIC
## "-540.8" "2" "1086" "1092"
##
## Call:
## likfit(geodata = grf_data, ini.cov.pars = c(20, 5), fix.nugget = FALSE,
## nugget = 0.5, cov.model = "matern", lik.method = "REML")
| Matern Function | tau(nugget) | sigma2(sill-nugget) | phi(range) | SSE | AIC | BIC |
| True values | 0.5 | 20 | 5 | |||
| OLS Estimated Values | 0.22 | 23.86 | 4.81 | 23.18 | ||
| WLS Estimated Values | 0.90 | 28.59 | 7.00 | 172.41 | ||
| ML Estimated Values | 0.51 | 16 | 4.07 | 813.9 | 827.1 | |
| REML Estimated Values | 0.53 | 23.99 | 6.26 | 805.1 | 818.2 |
######### Gaussian function #########
#ols
gaussian_ols=variofit(matern_simulation_bin, ini.cov.pars=c(20,5),nugget=0.5,fix.nugget=FALSE,max.dist=10,cov.model='gaussian', weights="equal")
## variofit: covariance model used is gaussian
## variofit: weights used: equal
## variofit: minimisation function used: optim
plot(matern_simulation_bin, xlab="Distance (h)",main = "empirical semi-variogram")
lines(gaussian_ols,col="blue",lwd=1.5)
summary(gaussian_ols)
## $pmethod
## [1] "OLS (ordinary least squares)"
##
## $cov.model
## [1] "gaussian"
##
## $spatial.component
## sigmasq phi
## 17.063207 3.802742
##
## $spatial.component.extra
## kappa
## 0.5
##
## $nugget.component
## tausq
## 2.582269
##
## $fix.nugget
## [1] FALSE
##
## $fix.kappa
## [1] TRUE
##
## $practicalRange
## [1] 6.581853
##
## $sum.of.squares
## value
## 16.94854
##
## $estimated.pars
## tausq sigmasq phi
## 2.582269 17.063207 3.802742
##
## $weights
## [1] "equal"
##
## $call
## variofit(vario = matern_simulation_bin, ini.cov.pars = c(20,
## 5), cov.model = "gaussian", fix.nugget = FALSE, nugget = 0.5,
## max.dist = 10, weights = "equal")
##
## attr(,"class")
## [1] "summary.variomodel"
######### Gaussian function #########
# wls
gaussian_wls=variofit(matern_simulation_bin,ini.cov.pars=c(20,5),nugget=0.5,fix.nugget=FALSE,max.dist=10,cov.model='gaussian',weights='cressie')
## variofit: covariance model used is gaussian
## variofit: weights used: cressie
## variofit: minimisation function used: optim
plot(matern_simulation_bin, xlab="Distance (h)",main = "empirical semi-variogram")
lines(gaussian_wls,col="blue",lwd=1.5)
summary(gaussian_wls)
## $pmethod
## [1] "WLS (weighted least squares)"
##
## $cov.model
## [1] "gaussian"
##
## $spatial.component
## sigmasq phi
## 16.543795 4.008784
##
## $spatial.component.extra
## kappa
## 0.5
##
## $nugget.component
## tausq
## 3.482518
##
## $fix.nugget
## [1] FALSE
##
## $fix.kappa
## [1] TRUE
##
## $practicalRange
## [1] 6.938475
##
## $sum.of.squares
## value
## 117.3202
##
## $estimated.pars
## tausq sigmasq phi
## 3.482518 16.543795 4.008784
##
## $weights
## [1] "cressie"
##
## $call
## variofit(vario = matern_simulation_bin, ini.cov.pars = c(20,
## 5), cov.model = "gaussian", fix.nugget = FALSE, nugget = 0.5,
## max.dist = 10, weights = "cressie")
##
## attr(,"class")
## [1] "summary.variomodel"
######### Gaussian function #########
# MLE
gaussian_mle=likfit(grf_data,ini.cov.pars=c(20,5),nugget=0.5, fix.nugget=FALSE, cov.model='gaussian', lik.method='ML')
## kappa not used for the gaussian correlation function
## ---------------------------------------------------------------
## likfit: likelihood maximisation using the function optim.
## likfit: Use control() to pass additional
## arguments for the maximisation function.
## For further details see documentation for optim.
## likfit: It is highly advisable to run this function several
## times with different initial values for the parameters.
## likfit: WARNING: This step can be time demanding!
## ---------------------------------------------------------------
## likfit: end of numerical maximisation.
plot(matern_simulation_bin, xlab="Distance (h)",main = "empirical semi-variogram")
lines(gaussian_mle,col="blue",lwd=1.5)
summary(gaussian_mle)
## Summary of the parameter estimation
## -----------------------------------
## Estimation method: maximum likelihood
##
## Parameters of the mean component (trend):
## beta
## 3.8215
##
## Parameters of the spatial component:
## correlation function: gaussian
## (estimated) variance parameter sigmasq (partial sill) = 10.31
## (estimated) cor. fct. parameter phi (range parameter) = 2.393
## anisotropy parameters:
## (fixed) anisotropy angle = 0 ( 0 degrees )
## (fixed) anisotropy ratio = 1
##
## Parameter of the error component:
## (estimated) nugget = 2.353
##
## Transformation parameter:
## (fixed) Box-Cox parameter = 1 (no transformation)
##
## Practical Range with cor=0.05 for asymptotic range: 4.141494
##
## Maximised Likelihood:
## log.L n.params AIC BIC
## "-415.4" "4" "838.8" "852"
##
## non spatial model:
## log.L n.params AIC BIC
## "-543" "2" "1090" "1097"
##
## Call:
## likfit(geodata = grf_data, ini.cov.pars = c(20, 5), fix.nugget = FALSE,
## nugget = 0.5, cov.model = "gaussian", lik.method = "ML")
######### Gaussian function #########
# Restricted MLE
gaussian_reml=likfit(grf_data,ini.cov.pars=c(20,5),nugget=0.5, fix.nugget=FALSE, cov.model='gaussian', lik.method='REML')
## kappa not used for the gaussian correlation function
## ---------------------------------------------------------------
## likfit: likelihood maximisation using the function optim.
## likfit: Use control() to pass additional
## arguments for the maximisation function.
## For further details see documentation for optim.
## likfit: It is highly advisable to run this function several
## times with different initial values for the parameters.
## likfit: WARNING: This step can be time demanding!
## ---------------------------------------------------------------
## likfit: end of numerical maximisation.
plot(matern_simulation_bin, xlab="Distance (h)",main = "empirical semi-variogram")
lines(gaussian_reml,col="blue",lwd=1.5)
summary(gaussian_reml)
## Summary of the parameter estimation
## -----------------------------------
## Estimation method: restricted maximum likelihood
##
## Parameters of the mean component (trend):
## beta
## 3.7748
##
## Parameters of the spatial component:
## correlation function: gaussian
## (estimated) variance parameter sigmasq (partial sill) = 11.07
## (estimated) cor. fct. parameter phi (range parameter) = 2.455
## anisotropy parameters:
## (fixed) anisotropy angle = 0 ( 0 degrees )
## (fixed) anisotropy ratio = 1
##
## Parameter of the error component:
## (estimated) nugget = 2.369
##
## Transformation parameter:
## (fixed) Box-Cox parameter = 1 (no transformation)
##
## Practical Range with cor=0.05 for asymptotic range: 4.249548
##
## Maximised Likelihood:
## log.L n.params AIC BIC
## "-411.8" "4" "831.5" "844.7"
##
## non spatial model:
## log.L n.params AIC BIC
## "-540.8" "2" "1086" "1092"
##
## Call:
## likfit(geodata = grf_data, ini.cov.pars = c(20, 5), fix.nugget = FALSE,
## nugget = 0.5, cov.model = "gaussian", lik.method = "REML")
| Gaussian Function | ||||||
| tau(nugget) | sigma2(sill-nugget) | phi(range) | SSE | AIC | BIC | |
| True values | 0.5 | 20 | 5 | |||
| OLE Estimated Values | 2.58 | 17.06 | 3.80 | 16.94 | ||
| WLS Estimated Values | 3.48 | 16.54 | 4.01 | 117.32 | ||
| ML Estimated Values | 2.35 | 10.31 | 2.39 | 838.8 | 852 | |
| REML Estimated Values | 2.37 | 11.07 | 2.46 | 831.5 | 844.7 |
For both Matern and Gaussian functions, estimated Values by OLS are
the most close to the true value. Second accurate method is WLS, and
then REML, and then ML.
And the estimates using Gaussian functions are all less accurate than
Matern functions.
set.seed(1007249951)
n <- 200 # Number of points
coords1 <- cbind(runif(n, 0, 10), runif(n, 0, 10))
grf_data1 <- grf(n, grid=coords1, cov.model="matern", cov.pars=c(20, 5), kappa=0.5, nugget = 0.5)
## grf: simulation on a set of locations provided by the user
## grf: process with 1 covariance structure(s)
## grf: nugget effect is: tausq= 0.5
## grf: covariance model 1 is: exponential(sigmasq=20, phi=5)
## grf: decomposition algorithm used is: cholesky
## grf: End of simulation procedure. Number of realizations: 1
set.seed(10072499)
n <- 200 # Number of points
coords2 <- cbind(runif(n, 0, 10), runif(n, 0, 10))
grf_data2 <- grf(n, grid=coords2, cov.model="matern", cov.pars=c(20, 5), kappa=0.5, nugget = 0.5)
## grf: simulation on a set of locations provided by the user
## grf: process with 1 covariance structure(s)
## grf: nugget effect is: tausq= 0.5
## grf: covariance model 1 is: exponential(sigmasq=20, phi=5)
## grf: decomposition algorithm used is: cholesky
## grf: End of simulation procedure. Number of realizations: 1
#The chosen model is matern funciton using OLS
######### Matern function #########
#ols
#For the first dataset:
matern_simulation1<-variog(grf_data1,option="cloud")
## variog: computing omnidirectional variogram
matern_simulation_bin1<-variog(grf_data1,uvec=seq(0,matern_simulation1$max.dist,l=20),option="bin", estimator.type="modulus")
## variog: computing omnidirectional variogram
gaussian_ols_grf1=variofit(matern_simulation_bin1, ini.cov.pars=c(20,5),nugget=0.5,fix.nugget=FALSE,max.dist =8, cov.model='matern', weights="equal")
## variofit: covariance model used is matern
## variofit: weights used: equal
## variofit: minimisation function used: optim
## Warning in variofit(matern_simulation_bin1, ini.cov.pars = c(20, 5), nugget =
## 0.5, : unreasonable initial value for sigmasq + nugget (too low)
plot(matern_simulation_bin1, xlab="Distance (h)",main = "empirical semi-variogram")
lines(gaussian_ols_grf1,col="blue",lwd=1.5)
summary(gaussian_ols_grf1)
## $pmethod
## [1] "OLS (ordinary least squares)"
##
## $cov.model
## [1] "matern"
##
## $spatial.component
## sigmasq phi
## 23.370365 5.350494
##
## $spatial.component.extra
## kappa
## 0.5
##
## $nugget.component
## tausq
## 0.9126484
##
## $fix.nugget
## [1] FALSE
##
## $fix.kappa
## [1] TRUE
##
## $practicalRange
## [1] 16.02866
##
## $sum.of.squares
## value
## 12.7838
##
## $estimated.pars
## tausq sigmasq phi
## 0.9126484 23.3703649 5.3504941
##
## $weights
## [1] "equal"
##
## $call
## variofit(vario = matern_simulation_bin1, ini.cov.pars = c(20,
## 5), cov.model = "matern", fix.nugget = FALSE, nugget = 0.5,
## max.dist = 8, weights = "equal")
##
## attr(,"class")
## [1] "summary.variomodel"
######### Matern function #########
#ols
#For the second dataset:
matern_simulation2<-variog(grf_data2,option="cloud")
## variog: computing omnidirectional variogram
matern_simulation_bin2<-variog(grf_data2,uvec=seq(0,matern_simulation2$max.dist,l=20),option="bin", estimator.type="modulus")
## variog: computing omnidirectional variogram
gaussian_ols_grf2=variofit(matern_simulation_bin2, ini.cov.pars=c(20,5),nugget=0.5,fix.nugget=FALSE,max.dist =10,cov.model='matern', weights="equal")
## variofit: covariance model used is matern
## variofit: weights used: equal
## variofit: minimisation function used: optim
plot(matern_simulation_bin2, xlab="Distance (h)",main = "empirical semi-variogram")
lines(gaussian_ols_grf2,col="blue",lwd=1.5)
summary(gaussian_ols_grf2)
## $pmethod
## [1] "OLS (ordinary least squares)"
##
## $cov.model
## [1] "matern"
##
## $spatial.component
## sigmasq phi
## 55.43823 13.35784
##
## $spatial.component.extra
## kappa
## 0.5
##
## $nugget.component
## tausq
## 0
##
## $fix.nugget
## [1] FALSE
##
## $fix.kappa
## [1] TRUE
##
## $practicalRange
## [1] 40.0165
##
## $sum.of.squares
## value
## 61.69628
##
## $estimated.pars
## tausq sigmasq phi
## 0.00000 55.43823 13.35784
##
## $weights
## [1] "equal"
##
## $call
## variofit(vario = matern_simulation_bin2, ini.cov.pars = c(20,
## 5), cov.model = "matern", fix.nugget = FALSE, nugget = 0.5,
## max.dist = 10, weights = "equal")
##
## attr(,"class")
## [1] "summary.variomodel"
| True value | Model using the simulated data in (a) | Model using the first newly simulated data | Model using the second newly simulated data | |
|---|---|---|---|---|
| tau2 | 0.5 | 0.22 | 0.91 | 0.00 |
| sigma2 | 20 | 23.86 | 23.37 | 55.43 |
| phi | 5 | 4.81 | 5.35 | 13.35 |
For the first simulated data, the estimates are very similar to the original ones from (a). But the estimates from the second simulated data are very different. This results show that parameter estimates from these models depend greatly on the simulated dataset.
Additionally, I also find out the argument ‘max.dist=’ has a huge effect on the estimates.
dorian <- read.csv("/Users/davegong/Desktop/year4 fall/sta465/HW2/dorian.csv")
head(dorian)
## lat lon temp dew.point ceiling.ht wind.dir wind.sp atm.press
## 1 32.483 -80.717 27.76667 21.13333 22000 315.0000 2.850000 1011.000
## 2 32.550 -80.450 25.48333 21.20000 NA 298.3333 1.083333 1010.500
## 3 32.782 -79.925 25.98333 NA NA 295.0000 3.350000 1010.067
## 4 32.899 -80.041 25.65000 19.90000 22000 321.6667 4.566667 1010.150
## 5 32.900 -80.033 25.30000 19.70000 22000 320.0000 4.650000 1010.250
## 6 33.350 -79.183 24.88333 21.51667 NA 296.6667 4.700000 1007.833
## rh
## 1 68.24173
## 2 77.99672
## 3 NA
## 4 71.31048
## 5 71.89816
## 6 81.84029
dorian = dorian %>%
st_as_sf(coords=c('lon','lat'), crs=4326, remove=FALSE) %>% #set datum
st_transform(crs=32617) %>% #project to UTM 11 (meters)
mutate(x = st_coordinates(.)[,1]/1000, y = st_coordinates(.)[,2]/1000) # extract x, y in km
head(dorian)
## Simple feature collection with 6 features and 11 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: 526589.8 ymin: 3594010 xmax: 669074.5 ymax: 3691563
## Projected CRS: WGS 84 / UTM zone 17N
## lat lon temp dew.point ceiling.ht wind.dir wind.sp atm.press
## 1 32.483 -80.717 27.76667 21.13333 22000 315.0000 2.850000 1011.000
## 2 32.550 -80.450 25.48333 21.20000 NA 298.3333 1.083333 1010.500
## 3 32.782 -79.925 25.98333 NA NA 295.0000 3.350000 1010.067
## 4 32.899 -80.041 25.65000 19.90000 22000 321.6667 4.566667 1010.150
## 5 32.900 -80.033 25.30000 19.70000 22000 320.0000 4.650000 1010.250
## 6 33.350 -79.183 24.88333 21.51667 NA 296.6667 4.700000 1007.833
## rh geometry x y
## 1 68.24173 POINT (526589.8 3594010) 526.5898 3594.010
## 2 77.99672 POINT (551638.3 3601535) 551.6383 3601.535
## 3 NA POINT (600670.7 3627631) 600.6707 3627.631
## 4 71.31048 POINT (589689.4 3640498) 589.6894 3640.498
## 5 71.89816 POINT (590436.7 3640616) 590.4367 3640.616
## 6 81.84029 POINT (669074.5 3691563) 669.0745 3691.563
dorian1 <- dorian %>% st_drop_geometry()
dorian1$logwind<-log(dorian1$wind.sp)
head(dorian1)
## lat lon temp dew.point ceiling.ht wind.dir wind.sp atm.press
## 1 32.483 -80.717 27.76667 21.13333 22000 315.0000 2.850000 1011.000
## 2 32.550 -80.450 25.48333 21.20000 NA 298.3333 1.083333 1010.500
## 3 32.782 -79.925 25.98333 NA NA 295.0000 3.350000 1010.067
## 4 32.899 -80.041 25.65000 19.90000 22000 321.6667 4.566667 1010.150
## 5 32.900 -80.033 25.30000 19.70000 22000 320.0000 4.650000 1010.250
## 6 33.350 -79.183 24.88333 21.51667 NA 296.6667 4.700000 1007.833
## rh x y logwind
## 1 68.24173 526.5898 3594.010 1.04731899
## 2 77.99672 551.6383 3601.535 0.08004271
## 3 NA 600.6707 3627.631 1.20896035
## 4 71.31048 589.6894 3640.498 1.51878354
## 5 71.89816 590.4367 3640.616 1.53686722
## 6 81.84029 669.0745 3691.563 1.54756251
dorian1_wind<-as.geodata(dorian1,coords.col=c(10,11),data.col=7)
semi_var_wind<-variog(dorian1_wind,option="cloud")
## variog: computing omnidirectional variogram
semi_var_wind_bin<-variog(dorian1_wind,uvec=seq(0,365,l=30),option="bin",estimator.type="modulus")
## variog: computing omnidirectional variogram
plot(semi_var_wind_bin,xlab="Distance (h)",main = "empirical semi-variogram")
#lines(gaussian_ols_grf2,col="blue",lwd=1.5)
#Gaussian
#WLS
wind_gaussian_wls=variofit(semi_var_wind_bin,ini.cov.pars=c(25,300),nugget=0.5,fix.nugget=FALSE,max.dist=270, cov.model='gaussian',weights='cressie')
## variofit: covariance model used is gaussian
## variofit: weights used: cressie
## variofit: minimisation function used: optim
summary(wind_gaussian_wls)
## $pmethod
## [1] "WLS (weighted least squares)"
##
## $cov.model
## [1] "gaussian"
##
## $spatial.component
## sigmasq phi
## 5731.805 6087.683
##
## $spatial.component.extra
## kappa
## 0.5
##
## $nugget.component
## tausq
## 2.841106
##
## $fix.nugget
## [1] FALSE
##
## $fix.kappa
## [1] TRUE
##
## $practicalRange
## [1] 10536.67
##
## $sum.of.squares
## value
## 180.6322
##
## $estimated.pars
## tausq sigmasq phi
## 2.841106 5731.805334 6087.683331
##
## $weights
## [1] "cressie"
##
## $call
## variofit(vario = semi_var_wind_bin, ini.cov.pars = c(25, 300),
## cov.model = "gaussian", fix.nugget = FALSE, nugget = 0.5,
## max.dist = 270, weights = "cressie")
##
## attr(,"class")
## [1] "summary.variomodel"
plot(semi_var_wind_bin,xlab="Distance (h)",main = "empirical semi-variogram")
lines(wind_gaussian_wls,col="blue",lwd=1.5)
#Gaussian
#ML
wind_gaussian_mle=likfit(dorian1_wind,ini.cov.pars=c(20,500),nugget=0.5, fix.nugget=FALSE, cov.model='gaussian', lik.method='ML', limits = pars.limits(phi = c(0, 400), sigmasq = c(0, 40)))
## kappa not used for the gaussian correlation function
## ---------------------------------------------------------------
## likfit: likelihood maximisation using the function optim.
## likfit: Use control() to pass additional
## arguments for the maximisation function.
## For further details see documentation for optim.
## likfit: It is highly advisable to run this function several
## times with different initial values for the parameters.
## likfit: WARNING: This step can be time demanding!
## ---------------------------------------------------------------
## likfit: end of numerical maximisation.
summary(wind_gaussian_mle)
## Summary of the parameter estimation
## -----------------------------------
## Estimation method: maximum likelihood
##
## Parameters of the mean component (trend):
## beta
## 10.3318
##
## Parameters of the spatial component:
## correlation function: gaussian
## (estimated) variance parameter sigmasq (partial sill) = 43.3
## (estimated) cor. fct. parameter phi (range parameter) = 400
## anisotropy parameters:
## (fixed) anisotropy angle = 0 ( 0 degrees )
## (fixed) anisotropy ratio = 1
##
## Parameter of the error component:
## (estimated) nugget = 3.486
##
## Transformation parameter:
## (fixed) Box-Cox parameter = 1 (no transformation)
##
## Practical Range with cor=0.05 for asymptotic range: 692.3274
##
## Maximised Likelihood:
## log.L n.params AIC BIC
## "-137.8" "4" "283.6" "292"
##
## non spatial model:
## log.L n.params AIC BIC
## "-189.5" "2" "383.1" "387.3"
##
## Call:
## likfit(geodata = dorian1_wind, ini.cov.pars = c(20, 500), fix.nugget = FALSE,
## nugget = 0.5, cov.model = "gaussian", lik.method = "ML",
## limits = pars.limits(phi = c(0, 400), sigmasq = c(0, 40)))
plot(semi_var_wind_bin,xlab="Distance (h)",main = "empirical semi-variogram")
lines(wind_gaussian_mle,col="blue",lwd=1.5)
#Exponential
#WLS
wind_Exponential_wls=variofit(semi_var_wind_bin,ini.cov.pars=c(4,500),nugget=0.5,fix.nugget=FALSE,max.dist=300,cov.model='exponential',weights='cressie')
## variofit: covariance model used is exponential
## variofit: weights used: cressie
## variofit: minimisation function used: optim
## Warning in variofit(semi_var_wind_bin, ini.cov.pars = c(4, 500), nugget = 0.5,
## : unreasonable initial value for sigmasq + nugget (too low)
summary(wind_Exponential_wls)
## $pmethod
## [1] "WLS (weighted least squares)"
##
## $cov.model
## [1] "exponential"
##
## $spatial.component
## sigmasq phi
## 29925.72 590422.90
##
## $spatial.component.extra
## kappa
## 0.5
##
## $nugget.component
## tausq
## 0.6263036
##
## $fix.nugget
## [1] FALSE
##
## $fix.kappa
## [1] TRUE
##
## $practicalRange
## [1] 1768749
##
## $sum.of.squares
## value
## 238.4918
##
## $estimated.pars
## tausq sigmasq phi
## 6.263036e-01 2.992572e+04 5.904229e+05
##
## $weights
## [1] "cressie"
##
## $call
## variofit(vario = semi_var_wind_bin, ini.cov.pars = c(4, 500),
## cov.model = "exponential", fix.nugget = FALSE, nugget = 0.5,
## max.dist = 300, weights = "cressie")
##
## attr(,"class")
## [1] "summary.variomodel"
plot(semi_var_wind_bin,xlab="Distance (h)",main = "empirical semi-variogram")
lines(wind_Exponential_wls,col="blue",lwd=1.5)
#Exponential
#ML
wind_Exponential_mle=likfit(dorian1_wind,ini.cov.pars=c(10,500),nugget=0.5, fix.nugget=FALSE, cov.model='exponential', lik.method='ML',limits = pars.limits(phi = c(0, 500), sigmasq = c(0, 40)))
## kappa not used for the exponential correlation function
## ---------------------------------------------------------------
## likfit: likelihood maximisation using the function optim.
## likfit: Use control() to pass additional
## arguments for the maximisation function.
## For further details see documentation for optim.
## likfit: It is highly advisable to run this function several
## times with different initial values for the parameters.
## likfit: WARNING: This step can be time demanding!
## ---------------------------------------------------------------
## likfit: end of numerical maximisation.
summary(wind_Exponential_mle)
## Summary of the parameter estimation
## -----------------------------------
## Estimation method: maximum likelihood
##
## Parameters of the mean component (trend):
## beta
## 9.2403
##
## Parameters of the spatial component:
## correlation function: exponential
## (estimated) variance parameter sigmasq (partial sill) = 43.57
## (estimated) cor. fct. parameter phi (range parameter) = 500
## anisotropy parameters:
## (fixed) anisotropy angle = 0 ( 0 degrees )
## (fixed) anisotropy ratio = 1
##
## Parameter of the error component:
## (estimated) nugget = 0.7215
##
## Transformation parameter:
## (fixed) Box-Cox parameter = 1 (no transformation)
##
## Practical Range with cor=0.05 for asymptotic range: 1497.866
##
## Maximised Likelihood:
## log.L n.params AIC BIC
## "-138.1" "4" "284.2" "292.7"
##
## non spatial model:
## log.L n.params AIC BIC
## "-189.5" "2" "383.1" "387.3"
##
## Call:
## likfit(geodata = dorian1_wind, ini.cov.pars = c(10, 500), fix.nugget = FALSE,
## nugget = 0.5, cov.model = "exponential", lik.method = "ML",
## limits = pars.limits(phi = c(0, 500), sigmasq = c(0, 40)))
plot(semi_var_wind_bin,xlab="Distance (h)",main = "empirical semi-variogram")
lines(wind_Exponential_mle,col="blue",lwd=1.5)
#Matern
#WLS
wind_Matern_wls=variofit(semi_var_wind_bin,ini.cov.pars=c(30,300),nugget=0.5,fix.nugget=FALSE,cov.model='matern',weights='cressie', max.dis=200)
## variofit: covariance model used is matern
## variofit: weights used: cressie
## variofit: minimisation function used: optim
summary(wind_Matern_wls)
## $pmethod
## [1] "WLS (weighted least squares)"
##
## $cov.model
## [1] "matern"
##
## $spatial.component
## sigmasq phi
## 4125.793 270416.054
##
## $spatial.component.extra
## kappa
## 0.5
##
## $nugget.component
## tausq
## 2.721624
##
## $fix.nugget
## [1] FALSE
##
## $fix.kappa
## [1] TRUE
##
## $practicalRange
## [1] 810094.1
##
## $sum.of.squares
## value
## 101.3883
##
## $estimated.pars
## tausq sigmasq phi
## 2.721624e+00 4.125793e+03 2.704161e+05
##
## $weights
## [1] "cressie"
##
## $call
## variofit(vario = semi_var_wind_bin, ini.cov.pars = c(30, 300),
## cov.model = "matern", fix.nugget = FALSE, nugget = 0.5, max.dist = 200,
## weights = "cressie")
##
## attr(,"class")
## [1] "summary.variomodel"
plot(semi_var_wind_bin,xlab="Distance (h)",main = "empirical semi-variogram")
lines(wind_Matern_wls,col="blue",lwd=1.5)
#Matern
#ML
wind_Matern_mle=likfit(dorian1_wind,ini.cov.pars=c(4,500),nugget=0.5, fix.nugget=FALSE, cov.model='matern', lik.method='ML')
## ---------------------------------------------------------------
## likfit: likelihood maximisation using the function optim.
## likfit: Use control() to pass additional
## arguments for the maximisation function.
## For further details see documentation for optim.
## likfit: It is highly advisable to run this function several
## times with different initial values for the parameters.
## likfit: WARNING: This step can be time demanding!
## ---------------------------------------------------------------
## likfit: end of numerical maximisation.
summary(wind_Matern_mle)
## Summary of the parameter estimation
## -----------------------------------
## Estimation method: maximum likelihood
##
## Parameters of the mean component (trend):
## beta
## 9.2403
##
## Parameters of the spatial component:
## correlation function: exponential
## (estimated) variance parameter sigmasq (partial sill) = 43.57
## (estimated) cor. fct. parameter phi (range parameter) = 500
## anisotropy parameters:
## (fixed) anisotropy angle = 0 ( 0 degrees )
## (fixed) anisotropy ratio = 1
##
## Parameter of the error component:
## (estimated) nugget = 0.7217
##
## Transformation parameter:
## (fixed) Box-Cox parameter = 1 (no transformation)
##
## Practical Range with cor=0.05 for asymptotic range: 1497.862
##
## Maximised Likelihood:
## log.L n.params AIC BIC
## "-138.1" "4" "284.2" "292.7"
##
## non spatial model:
## log.L n.params AIC BIC
## "-189.5" "2" "383.1" "387.3"
##
## Call:
## likfit(geodata = dorian1_wind, ini.cov.pars = c(4, 500), fix.nugget = FALSE,
## nugget = 0.5, cov.model = "matern", lik.method = "ML")
plot(semi_var_wind_bin,xlab="Distance (h)",main = "empirical semi-variogram")
lines(wind_Matern_mle,col="blue",lwd=1.5)
| Gaussian | ||||||
| tau(nugget) | sigma2(sill-nugget) | phi(range) | SSE | AIC | BIC | |
| WLS | 2.84 | 5371.81 | 6087.68 | 180.63 | ||
| ML | 3.486 | 43.3 | 400 | 283.6 | 292 |
| Exponential | ||||||
| tau(nugget) | sigma2(sill-nugget) | phi(range) | SSE | AIC | BIC | |
| WLS | 0.63 | 29925.72 | 590422.9 | 238.4918 | ||
| ML | 0.7215 | 43.57 | 500 | 284.2 | 292.7 |
| Matern | ||||||
| tau(nugget) | sigma2(sill-nugget) | phi(range) | SSE | AIC | BIC | |
| WLS | 2.72 | 4125 | 27041 | 101.38 | ||
| ML | 0.72 | 43.57 | 500 | 284.2 | 292.7 |
Describe the fitted parameters, and compare the models. Choose what you feel is the best model for the data.
For the exponential, Gaussian, and Matern functions, the value of estimated parametres are very sensitive to max.dist. I have tried different max.dist. However, the the value estimated parametres still blow up and can not be relied on. In other words, the estimates are too far from the eye-balled estimates. Hence, the three models of Exponential, Gaussian, and Matern functions by WLS method fail.
Clearly, we can see that estimates by ML method is more stable and less sensitive to the max.dist. Also, the estimates by ML are more close the eye-balled estimates. Actually, the three models by ML method have very similar AIC and BIC, so I choose model of Matern function by ML method as the best model.
#Create the grid
#dorian1 <- dorian %>% st_drop_geometry()
#dorian1_wind<-as.geodata(dorian1,coords.col=c(10,11),data.col=7)
res=100
xs=seq(min(dorian1$x),max(dorian1$x),len=res)
ys=seq(min(dorian1$y),max(dorian1$y),len=res)
myGrid=expand.grid(xs,ys)
names(myGrid)=c('x','y')
# make sure it looks correct
plot(myGrid, pch=19, cex=0.5)
#I decide to use Matern ML
#Ordinary kriging
semi_var_wind_bin<-variog(dorian1_wind,uvec=seq(0,365,l=30),option="bin",estimator.type="modulus")
## variog: computing omnidirectional variogram
wind_Matern_mle=likfit(dorian1_wind,ini.cov.pars=c(4,500),nugget=0.5, fix.nugget=FALSE, cov.model='matern', lik.method='ML')
## ---------------------------------------------------------------
## likfit: likelihood maximisation using the function optim.
## likfit: Use control() to pass additional
## arguments for the maximisation function.
## For further details see documentation for optim.
## likfit: It is highly advisable to run this function several
## times with different initial values for the parameters.
## likfit: WARNING: This step can be time demanding!
## ---------------------------------------------------------------
## likfit: end of numerical maximisation.
summary(wind_Matern_mle)
## Summary of the parameter estimation
## -----------------------------------
## Estimation method: maximum likelihood
##
## Parameters of the mean component (trend):
## beta
## 9.2403
##
## Parameters of the spatial component:
## correlation function: exponential
## (estimated) variance parameter sigmasq (partial sill) = 43.57
## (estimated) cor. fct. parameter phi (range parameter) = 500
## anisotropy parameters:
## (fixed) anisotropy angle = 0 ( 0 degrees )
## (fixed) anisotropy ratio = 1
##
## Parameter of the error component:
## (estimated) nugget = 0.7217
##
## Transformation parameter:
## (fixed) Box-Cox parameter = 1 (no transformation)
##
## Practical Range with cor=0.05 for asymptotic range: 1497.862
##
## Maximised Likelihood:
## log.L n.params AIC BIC
## "-138.1" "4" "284.2" "292.7"
##
## non spatial model:
## log.L n.params AIC BIC
## "-189.5" "2" "383.1" "387.3"
##
## Call:
## likfit(geodata = dorian1_wind, ini.cov.pars = c(4, 500), fix.nugget = FALSE,
## nugget = 0.5, cov.model = "matern", lik.method = "ML")
plot(semi_var_wind_bin,xlab="Distance (h)",main = "empirical semi-variogram")
lines(wind_Matern_mle,col="blue",lwd=1.5)
KCord<-krige.control(type.krige='ok',obj.m=wind_Matern_mle)
ordinary_krige<-krige.conv(dorian1_wind,locations=myGrid,krige=KCord)
## krige.conv: model with constant mean
## krige.conv: Kriging performed using global neighbourhood
#Plot the kriged estimates
image.plot(xs,ys,matrix(ordinary_krige$predict,res,res,byrow=FALSE),col=tim.colors(32), main="Ordinary Kriging Predictions")
#Plot the standard errors
image.plot(xs,ys,matrix(sqrt(ordinary_krige$krige.var),res,res,byrow=FALSE),col=tim.colors(32), main="Ordinary Kriging Errors")
ordinary_krige$beta.est
## beta
## 9.24032
estimated mean = 9.24032
Firstly, Kriging is a technique for spatial prediction, which aims to estimate the value of Z(s) at one or more unobserved locations based on observed samples. The basic steps of ordinary kriging is:
Choose a parametric model (Exponential in this question) for the semi-variance functions
Estimate the semivariogram parameters by WLS.
Make prediction, which is the weighted average of the chosen observed samples. The estimated semivariogram can indicate the strength of spatial association and therefore determines the weighting.
\(\hat{Z}\)(s) = \(\sum_{i=1}^{N}w_{i}Z(s_i)\), with constraint \(\sum_{i=1}^{N}w_{i}=1\). Also, we let \(E[\hat{Z}(s_0)] = E[Z(s_0)] =\mu\). The goal is basically to minimize \(E[(\hat{Z}(s_0)-Z(s_0))^2]\)
Ordinary Kriging requires us to estimate a mean value, \(\mu\), and assume \(\mu\) is constant so that Z(s) = \(\mu\) + \(\epsilon\)(s) for any location.
By Lagrange multiplier, we can get the equation: \(\sum_{j}w_{j}C_{ij} +\lambda = C_{i0}\)
To solve the weights:
\[ \begin{pmatrix} \omega_1 \\ \omega_2 \\ \vdots \\ \omega_N \\ \lambda \end{pmatrix} = \begin{bmatrix} C_{11} & C_{12} & \cdots & C_{1N} & 1 \\ C_{21} & C_{22} & \cdots & C_{2N} & 1 \\ \vdots & \vdots & \ddots & \vdots & \vdots \\ C_{N1} & C_{N2} & \cdots & C_{NN} & 1 \\ 1 & 1 & \cdots & 1 & 0 \end{bmatrix}^{-1} \times \begin{pmatrix} C_{10} \\ C_{20} \\ \vdots \\ C_{N0} \\ 1 \end{pmatrix} \] where \(C_{i0} = Cov[Z(s_i),Z(s_0)]\) and \(C_{ij} = Cov[Z(s_i),Z(s_j)]\)
#Explore the trends in x-direction and y-direction
plot(dorian1_wind)
We can see there is a trend along the x-direction by the plot
summary(lm(wind.sp~x+y,data=dorian1))
##
## Call:
## lm(formula = wind.sp ~ x + y, data = dorian1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.0924 -1.6203 -0.4241 1.2272 8.1011
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -28.251603 12.893802 -2.191 0.0325 *
## x 0.025326 0.002002 12.647 <2e-16 ***
## y 0.004874 0.003424 1.423 0.1600
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.72 on 58 degrees of freedom
## Multiple R-squared: 0.7597, Adjusted R-squared: 0.7514
## F-statistic: 91.69 on 2 and 58 DF, p-value: < 2.2e-16
summary(lm(wind.sp~x+y+I(x^2)+I(y^2)+ I(x*y),data=dorian1))
##
## Call:
## lm(formula = wind.sp ~ x + y + I(x^2) + I(y^2) + I(x * y), data = dorian1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.3253 -0.6954 0.2777 0.9132 5.6693
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.806e+00 2.828e+02 -0.017 0.987
## x 5.786e-02 7.937e-02 0.729 0.469
## y -7.662e-03 1.534e-01 -0.050 0.960
## I(x^2) 6.333e-05 8.045e-06 7.872 1.41e-10 ***
## I(y^2) 3.347e-06 2.093e-05 0.160 0.874
## I(x * y) -2.985e-05 2.155e-05 -1.386 0.171
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.847 on 55 degrees of freedom
## Multiple R-squared: 0.8949, Adjusted R-squared: 0.8853
## F-statistic: 93.61 on 5 and 55 DF, p-value: < 2.2e-16
We can see Here x is significant in linear and quadratic trend check.
#Compare linear and quadratic trend
model_linear <- likfit(dorian1_wind, ini.cov.pars = c(6, 100), nugget = 0.5,
fix.nugget = FALSE, cov.model = 'matern',
lik.method = 'ML', trend = '1st')
## ---------------------------------------------------------------
## likfit: likelihood maximisation using the function optim.
## likfit: Use control() to pass additional
## arguments for the maximisation function.
## For further details see documentation for optim.
## likfit: It is highly advisable to run this function several
## times with different initial values for the parameters.
## likfit: WARNING: This step can be time demanding!
## ---------------------------------------------------------------
## likfit: end of numerical maximisation.
model_quadratic <- likfit(dorian1_wind, ini.cov.pars = c(6, 100), nugget = 0.5,
fix.nugget = FALSE, cov.model = 'matern',
lik.method = 'ML', trend = '2nd')
## ---------------------------------------------------------------
## likfit: likelihood maximisation using the function optim.
## likfit: Use control() to pass additional
## arguments for the maximisation function.
## For further details see documentation for optim.
## likfit: It is highly advisable to run this function several
## times with different initial values for the parameters.
## likfit: WARNING: This step can be time demanding!
## ---------------------------------------------------------------
## likfit: end of numerical maximisation.
##
## WARNING: estimated sill is less than 1 hundredth of the total variance. Consider re-examine the model excluding spatial dependence
# For the linear model
print(AIC(model_linear))
## [1] 275.1067
# For the quadratic model
print(AIC(model_quadratic))
## [1] 259.8099
By comparing the AIC, we think linear trend is good enough for
fitting.
#Matern
#ML
wind_matern_mle_trend=likfit(dorian1_wind,ini.cov.pars=c(6,100), fix.nugget=FALSE, cov.model='matern', lik.method='ML',trend= '1st' )
## ---------------------------------------------------------------
## likfit: likelihood maximisation using the function optim.
## likfit: Use control() to pass additional
## arguments for the maximisation function.
## For further details see documentation for optim.
## likfit: It is highly advisable to run this function several
## times with different initial values for the parameters.
## likfit: WARNING: This step can be time demanding!
## ---------------------------------------------------------------
## likfit: end of numerical maximisation.
kriged_grid_trend=krige.conv(dorian1_wind,locations=myGrid, krige=krige.control(obj.model=wind_matern_mle_trend,trend.d='1st',trend.l='1st'))
## krige.conv: model with mean given by a 1st order polynomial on the coordinates
## krige.conv: Kriging performed using global neighbourhood
summary(wind_matern_mle_trend)
## Summary of the parameter estimation
## -----------------------------------
## Estimation method: maximum likelihood
##
## Parameters of the mean component (trend):
## beta0 beta1 beta2
## -31.0111 0.0253 0.0056
##
## Parameters of the spatial component:
## correlation function: exponential
## (estimated) variance parameter sigmasq (partial sill) = 7.808
## (estimated) cor. fct. parameter phi (range parameter) = 34.99
## anisotropy parameters:
## (fixed) anisotropy angle = 0 ( 0 degrees )
## (fixed) anisotropy ratio = 1
##
## Parameter of the error component:
## (estimated) nugget = 0
##
## Transformation parameter:
## (fixed) Box-Cox parameter = 1 (no transformation)
##
## Practical Range with cor=0.05 for asymptotic range: 104.8098
##
## Maximised Likelihood:
## log.L n.params AIC BIC
## "-131.9" "6" "275.8" "288.5"
##
## non spatial model:
## log.L n.params AIC BIC
## "-146" "4" "300.1" "308.5"
##
## Call:
## likfit(geodata = dorian1_wind, trend = "1st", ini.cov.pars = c(6,
## 100), fix.nugget = FALSE, cov.model = "matern", lik.method = "ML")
kriged_grid_trend$predict<-ifelse(kriged_grid_trend$predict<0,0,kriged_grid_trend$predict)
# plot predictions
image.plot(xs,ys,matrix(kriged_grid_trend$predict,res,res,byrow=FALSE),col=tim.colors(32), main="Predictions by Universal Kriging ML Linear Trend")
##Plot the standard errors
image.plot(xs,ys,matrix(sqrt(kriged_grid_trend$krige.var),res,res,byrow=FALSE),col=tim.colors(32), main="Universal Kriging Errors")
\(Z(x,y) = \mu + \beta_{1}x + \beta_{2}y + \epsilon(x,y)\) where \(\mu\) = -31.011061546, \(\beta_{1}\) = 0.025311543, \(\beta_{2}\) = 0.005592862
kriged_grid_trend$beta.est
## beta0 beta1 beta2
## -31.011061546 0.025311543 0.005592862
res=100
xs=seq(min(dorian1$lon),max(dorian1$lon),len=res)
ys=seq(min(dorian1$lat),max(dorian1$lat),len=res)
myGrid_1=expand.grid(xs,ys)
names(myGrid_1)=c('x','y')
preds = data.frame(myGrid_1, pred.wind=kriged_grid_trend$predict %>% as.vector)
pred.pal = colorNumeric(c('darkgreen','gold1','brown'),
domain=preds$pred.wind)
leaflet(preds) %>%
addProviderTiles("CartoDB.Positron") %>%
addCircles(lng=~x, lat=~y, color=~pred.pal(pred.wind),
opacity=.7, fillOpacity=.7, radius=1e3) %>% # points w/ 1KM radius
leaflet::addLegend('bottomleft', pal=pred.pal, values=preds$pred.wind,
title='Wind speed (m/s)', opacity=1)
Continue from the description of ordinary kriging, the universal kriging differ from step 5
Universal Kriging assumes mean value varies in different locations. We assume there is a trend in x and y. i.e. Z(s) = \(\mu (s)\) + \(\epsilon\)(s), where \(\mu (s) = \sum_{k=1}^{p}\beta_{k}x_{k}(s)\)
or we can write \(Z(s) = MVN(X(s)\beta, \sum)\), where \(X(s)\beta\) is responsible for catch the trend, so that \(\sum\) can be residual small-scale variation.
In our model, we think linear trend fit is more appropriate, so this gives us the formula: \(Z(x,y) = \mu + \beta_{1}x + \beta_{2}y + \epsilon(x,y)\), which is used for prediction.
Maximum Likelihood method is used to estimate \(\beta\).
\(\mu\) = -31.011061546, \(\beta_{1}\) = 0.025311543, \(\beta_{2}\) = 0.005592862.
\(\mu\) represents the baseline value
of the trend when x=0 and y=0
\(\beta_{1}\) model the linear trends
in x-coordinate.
\(\beta_{2}\) model the linear trends
in y-coordinate.
For example, Both \(\beta_{1}\) and
\(\beta_{2}\) are positive, so this
means the wind speed increase along x-direction and y-direction.
We can see the value of \(\beta_{1}\)
is larger than \(\beta_{2}\), which
means the linear trend in x-coordinate is more significant than
y-coordinate.
set.seed(1007249951)
train_index <- sample(1:nrow(dorian), size = 0.7 * nrow(dorian))
train_set <- dorian[train_index, ] # 70% training set
test_set <- dorian[-train_index, ] # 30% test set
dorian1_train <- train_set %>% st_drop_geometry()
dorian1_wind_train<-as.geodata(dorian1_train,coords.col=c(10,11),data.col=7)
dorian1_test <- test_set %>% st_drop_geometry()
dorian1_wind_test<-as.geodata(dorian1_test,coords.col=c(10,11),data.col=7)
wind_matern_mle_trend_train=likfit(dorian1_wind_train,ini.cov.pars=c(6,100), fix.nugget=FALSE, cov.model='matern', lik.method='ML',trend= '1st' )
## ---------------------------------------------------------------
## likfit: likelihood maximisation using the function optim.
## likfit: Use control() to pass additional
## arguments for the maximisation function.
## For further details see documentation for optim.
## likfit: It is highly advisable to run this function several
## times with different initial values for the parameters.
## likfit: WARNING: This step can be time demanding!
## ---------------------------------------------------------------
## likfit: end of numerical maximisation.
test_locations <- cbind(test_set$x, test_set$y)
kriged_grid_trend_test_prediction=krige.conv(dorian1_wind_train,locations=test_locations, krige=krige.control(obj.model=wind_matern_mle_trend_train,trend.d='1st',trend.l='1st'))
## krige.conv: model with mean given by a 1st order polynomial on the coordinates
## krige.conv: Kriging performed using global neighbourhood
#MSE
predicted_wind_speed <- kriged_grid_trend_test_prediction$predict
observed_wind_speed <- test_set$wind.sp
mse <- mean((observed_wind_speed - predicted_wind_speed)^2)
print(paste("MSE:", mse))
## [1] "MSE: 5.88579739855396"
#R2
ss_residual <- sum((observed_wind_speed - predicted_wind_speed)^2)
ss_total <- sum((observed_wind_speed - mean(observed_wind_speed))^2)
r_squared <- 1 - (ss_residual / ss_total)
print(paste("R^2:", r_squared))
## [1] "R^2: 0.829723405548776"
MSE: \(\text{MSE} = \frac{1}{n} \sum_{i=1}^{n} \left( y_i - \hat{y}_i \right)^2\)
On average, for the wind speed, the squared difference between the true value and predicted value is 5.89.
R^2: \(R^2 = 1 - \frac{\sum_{i=1}^{n} \left( y_i - \hat{y}_i \right)^2}{\sum_{i=1}^{n} \left( y_i - \bar{y} \right)^2}\)
82.97% of the variance in the variable, wind speed, can be explained by this model
library(tidyverse)
library(leaflet)
library(fields)
library(sf)
library(mgcv)
## Loading required package: nlme
##
## Attaching package: 'nlme'
## The following object is masked from 'package:dplyr':
##
## collapse
## This is mgcv 1.9-0. For overview type 'help("mgcv-package")'.
library(mgcViz)
## Loading required package: qgam
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
## Registered S3 method overwritten by 'mgcViz':
## method from
## +.gg GGally
##
## Attaching package: 'mgcViz'
## The following objects are masked from 'package:stats':
##
## qqline, qqnorm, qqplot
library(nlme)
# Another predictor I choose is temperature
#lm
lm_model <- lm(wind.sp ~ x * y + temp, data = dorian)
#gls
gls_model_ratio <- gls(wind.sp ~ x * y + temp,
data = dorian,
correlation = corRatio(form = ~ x + y))
gls_model_gaus <- gls(wind.sp ~ x * y + temp,
data = dorian,
correlation = corGaus(form = ~ x + y))
gls_model_spher <- gls(wind.sp ~ x * y + temp,
data = dorian,
correlation = corSpher(form = ~ x + y))
AIC(gls_model_ratio, gls_model_gaus, gls_model_spher)
## df AIC
## gls_model_ratio 7 318.8919
## gls_model_gaus 7 322.2488
## gls_model_spher 7 307.5009
#gam
gam_model_default_knot_temp<-gam(wind.sp~s(x,y,bs="ts")+ temp,data=dorian)
plot(gam_model_default_knot_temp, main="GAM default k")
Since gls_model_spher has the smallest AIC, we decide best covariance structure is Spherical correlation structure.
summary(lm_model)
##
## Call:
## lm(formula = wind.sp ~ x * y + temp, data = dorian)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.9148 -1.4290 -0.5427 1.4110 8.4799
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.985e+01 7.084e+01 0.421 0.675
## x -6.671e-02 1.031e-01 -0.647 0.520
## y -9.997e-03 1.753e-02 -0.570 0.571
## temp -1.888e-02 2.779e-01 -0.068 0.946
## x:y 2.370e-05 2.652e-05 0.894 0.375
##
## Residual standard error: 2.747 on 56 degrees of freedom
## Multiple R-squared: 0.7632, Adjusted R-squared: 0.7463
## F-statistic: 45.12 on 4 and 56 DF, p-value: < 2.2e-16
summary(gls_model_spher)$tTable
## Value Std.Error t-value p-value
## (Intercept) -1.041525e+02 4.089178e+02 -0.2547028 0.7998857001
## x 8.559991e-02 2.954113e-01 0.2897652 0.7730660878
## y 2.349574e-02 5.388313e-02 0.4360499 0.6644769320
## temp 4.178509e-01 1.135819e-01 3.6788502 0.0005274154
## x:y -1.639646e-05 7.581012e-05 -0.2162833 0.8295528951
summary(gam_model_default_knot_temp)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## wind.sp ~ s(x, y, bs = "ts") + temp
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.1312 7.2448 -0.018 0.986
## temp 0.2726 0.2945 0.926 0.360
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(x,y) 19.63 29 19.37 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.915 Deviance explained = 94.4%
## GCV = 3.9274 Scale est. = 2.5351 n = 61
lm():
\(\beta_x\) = -6.671e-02, Standard
error of x = 1.031e-01
\(\beta_y\) = -9.997e-03, Standard
error of y = 1.753e-02
gls_spher():
\(\beta_x\) = 8.559991e-02, Standard
error of x = 2.954113e-01
\(\beta_y\) = 2.349574e-02, Standard
error of y = 5.388313e-02
gam():
\(\beta_{temp}\) = 0.2726, Standard
error of temp = 0.2945
There are 29 basis functions(knots) in this gam() model. These functions
constitute the smoother term, which model non-linear relationship
between location and wind speed.
The coefficients of x and y are both negative for lm(), while gls() has positive coefficients of x and y. The standard errors of x and y for lm() are smaller than standard errors of x and y for gls(). The coefficients in front of each s(x, y) represent the weights of each basis functions used for smooth modelling.
AIC(lm_model, gls_model_spher, gam_model_default_knot_temp)
## Warning in AIC.default(lm_model, gls_model_spher, gam_model_default_knot_temp):
## models are not all fitted to the same number of observations
## df AIC
## lm_model 6.00000 303.1974
## gls_model_spher 7.00000 307.5009
## gam_model_default_knot_temp 22.62519 248.4027
From the output, we can see gam() model has the smaller AIC, so GAM
has the best model fitting.
lm() assumes a linear relationship between the predictors(x,y,
temp) and wind speed.
gls() adds a spatial correlation structure by considering the
relationship between observations based on their proximity. Here we have
three different correlation structure. Essentially, each structure
models how the correlation decreases as the distance between
observations increases.
(1)corRatio: Ratio correlation structure.
(2)corGaus: Gaussian correlation structure.
(3)corSpher: Spherical correlation structure.
gam() can help to model a smoother, non-linear relationships between the predictors (x, y, temp) and wind speed using splines (thin plate splines in this question).
#0.75*61 = 46
gam_pm_large_K_temp<-gam(wind.sp~s(x,y,bs="ts",k=46, fx=TRUE)+temp,data=dorian)
plot(gam_pm_large_K_temp, main="GAM large k")
plot(gam_model_default_knot_temp, main="GAM default k")
From two contour plots, we can clearly see there are less contour lines for larger number of knots. This means the surface is smoother.This means the model becomes more flexible and can fit the data more closely. However, more knots means higher risk of over-fitting.
res=100
xs=seq(min(dorian1$x),max(dorian1$x),len=res)
ys=seq(min(dorian1$y),max(dorian1$y),len=res)
myGrid=expand.grid(xs,ys)
names(myGrid)=c('x','y')
#Default knot
#Prediction
gam_model_default_knot<-gam(wind.sp~s(x,y,bs="ts"),data=dorian)
pred_gam<-predict.gam(gam_model_default_knot,myGrid,se.fit=TRUE)
# Plot predictions
image.plot(xs,ys,matrix(pred_gam$fit,res,res),col=tim.colors(32), main="predictions by GAM large k")
points(dorian$x,dorian$y,pch=19,cex=0.3)
#plot standard errors
image.plot(xs,ys,matrix(pred_gam$se.fit,res,res),col=tim.colors(32), main="std.error by GAM large k")
points(dorian$x,dorian$y,pch=19,cex=0.3)
#Larger knot
gam_pm_large_K<-gam(wind.sp~s(x,y,bs="ts",k=46, fx=TRUE),data=dorian)
#Prediction
pred_gam_large_K<-predict.gam(gam_pm_large_K,myGrid,se.fit=TRUE)
# Plot predictions
image.plot(xs,ys,matrix(pred_gam_large_K$fit,res,res),col=tim.colors(32), main="predictions by GAM large k")
points(dorian$x,dorian$y,pch=19,cex=0.3)
#plot standard errors
image.plot(xs,ys,matrix(pred_gam_large_K$se.fit,res,res),col=tim.colors(32), main="std.errors by GAM large k")
points(dorian$x,dorian$y,pch=19,cex=0.3)
Similarly, there are less contour lines for larger number of knots. This means the surface is smoother.This means the model becomes more flexible and can fit the data more closely.
The value of covariates, such as temp, is not available across the whole grid. Therefore, for some locations where the value of temperature is unknown, the model can not predicts, because the model requires the temperature as its input.
However, the value of x and y is available everywhere. Hence, the spatial-only gam() model can predict the wind speed everywhere and create a map.
gam_residuals <- residuals(gam_pm_large_K)
lm_residuals <- residuals(lm_model)
gls_residuals <- residuals(gls_model_spher)
gam_data <- data.frame(x = dorian$x, y = dorian$y, gam_residuals = gam_residuals)
lm_data <- data.frame(x = dorian$x, y = dorian$y, lm_residuals = lm_residuals)
gls_data <- data.frame(x = dorian$x, y = dorian$y, gls_residuals = gls_residuals)
gam_geodata <- as.geodata(gam_data, coords.col = 1:2, data.col = 3)
lm_geodata <- as.geodata(lm_data, coords.col = 1:2, data.col = 3)
gls_geodata <- as.geodata(gls_data, coords.col = 1:2, data.col = 3)
aaaaa<-variog(gam_geodata,option="cloud")
## variog: computing omnidirectional variogram
variog_gam<-variog(gam_geodata,uvec=seq(0,aaaaa$max.dist,l=20),option="bin", estimator.type="modulus")
## variog: computing omnidirectional variogram
bbbbb<-variog(lm_geodata,option="cloud")
## variog: computing omnidirectional variogram
variog_lm<-variog(lm_geodata,uvec=seq(0,bbbbb$max.dist,l=20),option="bin", estimator.type="modulus")
## variog: computing omnidirectional variogram
ccccc<-variog(gls_geodata,option="cloud")
## variog: computing omnidirectional variogram
variog_gls<-variog(gls_geodata,uvec=seq(0,ccccc$max.dist,l=20),option="bin", estimator.type="modulus")
## variog: computing omnidirectional variogram
# Compute the semivariograms
#variog_gam <- variog(gam_geodata)
#variog_lm <- variog(lm_geodata)
#variog_gls <- variog(gls_geodata)
plot(variog_gam, main = "Semivariogram: GAM Residuals")
plot(variog_lm, main = "Semivariogram: LM Residuals")
plot(variog_gls, main = "Semivariogram: GLS Residuals")
GAM: sill is around 0.4, phi is around 500, and nugget is around 0.4 The semivariogram is very flat compared to other two, which means residuals are uncorrelated in space. This is a good sign because this means there is no spatial correlation structure in the residuals. This means this model has captured almost all the spatial correlation structure.
LM: sill is around 15, phi is around 500, and nugget is around 0. There is a rising pattern of semivariance of residuals as distance increases, meaning there are spatial correlation between residuals. So the model has not fully accounted for the spatial correlation in the data.
GLS: sill is around 15, phi is around 500, and nugget is around 0. Similarly, there is a rising pattern of semivariance of residuals as distance increases. The difference from LM is that GLS has a smoother trend in the bin plot, because GLS considers the relationship between observations based on their proximity
idw<-function(data,locs,newlocs,p){
dists<-rdist(newlocs,locs)
return(((dists^(-p))%*%data)/((dists^(-p))%*%rep(1,length(data))))
}
# testing different p
idwPred6<-idw(dorian$wind.sp,cbind(dorian$x,dorian$y),myGrid,p=6)
idwPred12<-idw(dorian$wind.sp,cbind(dorian$x,dorian$y),myGrid,p=12)
# plotting the results on our grid
image.plot(xs,ys,matrix(idwPred6,res,res),col=tim.colors(32), main="p=6")
points(dorian$x,dorian$y,pch=19,cex=0.3)
image.plot(xs,ys,matrix(idwPred12,res,res),col=tim.colors(32), main="p=12")
points(dorian$x,dorian$y,pch=19,cex=0.3)
\(\hat{z}_0 = \frac{\sum_{i} Z(s_i) d(s_i, s_0)^{-p}}{\sum_{i} d(s_i, s_0)^{-p}}\)
When p is small, distant points have a greater influence on the prediction. This leads to a smoother surface because the interpolation considers a larger neighborhood of points.
When p is large, distant points have a smaller influence on the prediction. This leads to a less smoother surface with sharper changes between areas with different wind speed.
Advantages:
IDW is computationally efficient and does not require complet data preprocessing.
IDW allows us to adjust the influence of distant points according to our will. For example, IDM allows us to gives more weight to nearby points, if local points are important for prediction.
IDW does not require to have assumption of any spatial stationarity.
Disadvantages:
The choice of p can significantly affect the results, so requires hyper-parameter tuning.
IDW only considers distance. IDW does not consider the spatial correlation structure of the data by using the variogram, like Kriging does.
There is no statistical estimation of the standard errors for
predictions in IDW